library(plyr)
library(dplyr)

setwd("...")

load("./data/predictionsAndTopics.RData")


### Note: All performance metrics are saved in lists.
# Each item in the list is a different model.
# Each row in an item is a text representation of the PDB data.
# Each column in an item is a trope. 

textRep = c("d=16", "d=32", "d=64", "d=128", "d=256", "Bag of words")
predModels = c("C5.0", "MLP", "RF", "NB", "GBM")
tropes = c("INF", "ANIM", "BELLIG", "IRR")

getMetrics = function(theMetric) {
  allMatrix = get(paste0("all", theMetric, "Matrix"))
  allMatrixSyn = get(paste0("all", theMetric, "MatrixSyn"))
  allMatrixDict = get(paste0("all", theMetric, "MatrixDict"))
  
  # Metrics without synonyms
  metrics = as.data.frame(do.call("rbind", allMatrix))
  names(metrics) = tropes
  
  metrics$model = rep(predModels, each=nrow(allMatrix[[1]]))
  metrics$textRep = rep(textRep, length(allMatrix))
  metrics$synonyms = "No"
  
  # Metrics with synonyms
  metricsSyn = as.data.frame(do.call("rbind", allMatrixSyn))
  names(metricsSyn) = tropes
  
  metricsSyn$model = rep(predModels, each=nrow(allMatrixSyn[[1]]))
  metricsSyn$textRep = rep(textRep, length(allMatrixSyn))
  metricsSyn$synonyms = "Yes"
  
  # Metrics with only synonyms
  metricsDict = as.data.frame(do.call("rbind", allMatrixDict))
  names(metricsDict) = tropes
  
  metricsDict$model = predModels
  metricsDict$textRep = "None"
  metricsDict$synonyms = "Yes"
  
  
  allMetrics = rbind(metrics, metricsSyn, metricsDict)
  allMetrics$textRep = factor(allMetrics$textRep, levels = c("Bag of words", "d=16", "d=32", "d=64", "d=128", "d=256", "None"))
  allMetrics$synonyms = factor(allMetrics$synonyms, levels = c("No", "Yes"))
  allMetrics$model = factor(allMetrics$model, levels=c("C5.0", "GBM", "MLP", "NB", "RF"))
  allMetrics = allMetrics |> dplyr::select(textRep, synonyms, model, everything())
  
  allMetrics |> arrange(textRep, synonyms, model) |> 
    mutate(across(where(is.numeric), ~round(., 3)))
}

metricsKappa = getMetrics("Kappa")
metricsF1 = getMetrics("F1")
metricsAUC = getMetrics("AUC")
metricsAcc = getMetrics("Acc")

#### Table 2: Performance of highest-quality predictive models ####
metricsAcc[which.max(metricsAcc$INF), c("textRep", "synonyms", "model", "INF")]
metricsAcc[which.max(metricsAcc$ANIM), c("textRep", "synonyms", "model", "ANIM")]
metricsAcc[which.max(metricsAcc$BELLIG), c("textRep", "synonyms", "model", "BELLIG")]
metricsAcc[which.max(metricsAcc$IRR), c("textRep", "synonyms", "model", "IRR")]

metricsKappa[which.max(metricsKappa$INF), c("textRep", "synonyms", "model", "INF")]
metricsKappa[which.max(metricsKappa$ANIM), c("textRep", "synonyms", "model", "ANIM")]
metricsKappa[which.max(metricsKappa$BELLIG), c("textRep", "synonyms", "model", "BELLIG")]
metricsKappa[which.max(metricsKappa$IRR), c("textRep", "synonyms", "model", "IRR")]
